@article{10.1016/j.csbj.2016.07.002,
  title   = {Well-characterized sequence features of eukaryote genomes and implications for ab initio gene prediction.},
  author  = {Huang Y, Chen SY, Deng F.},
  number  = {14},
  pages   = {298-303},
  doi     = {10.1016/j.csbj.2016.07.002},
  year    = {2016 Jul 27},
  journal = {Comput Struct Biotechnol J.}
}

@article{10.1093/nar/gki937,
  author   = {Lomsadze, Alexandre and Ter-Hovhannisyan, Vardges and Chernoff, Yury O. and Borodovsky, Mark},
  title    = {{Gene identification in novel eukaryotic genomes by self-training algorithm}},
  journal  = {Nucleic Acids Research},
  volume   = {33},
  number   = {20},
  pages    = {6494-6506},
  year     = {2005},
  month    = {01},
  abstract = {{Finding new protein-coding genes is one of the most important goals of eukaryotic genome sequencing projects. However, genomic organization of novel eukaryotic genomes is diverse and ab initio gene finding tools tuned up for previously studied species are rarely suitable for efficacious gene hunting in DNA sequences of a new genome. Gene identification methods based on cDNA and expressed sequence tag (EST) mapping to genomic DNA or those using alignments to closely related genomes rely either on existence of abundant cDNA and EST data and/or availability on reference genomes. Conventional statistical ab initio methods require large training sets of validated genes for estimating gene model parameters. In practice, neither one of these types of data may be available in sufficient amount until rather late stages of the novel genome sequencing. Nevertheless, we have shown that gene finding in eukaryotic genomes could be carried out in parallel with statistical models estimation directly from yet anonymous genomic DNA. The suggested method of parallelization of gene prediction with the model parameters estimation follows the path of the iterative Viterbi training. Rounds of genomic sequence labeling into coding and non-coding regions are followed by the rounds of model parameters estimation. Several dynamically changing restrictions on the possible range of model parameters are added to filter out fluctuations in the initial steps of the algorithm that could redirect the iteration process away from the biologically relevant point in parameter space. Tests on well-studied eukaryotic genomes have shown that the new method performs comparably or better than conventional methods where the supervised model training precedes the gene prediction step. Several novel genomes have been analyzed and biologically interesting findings are discussed. Thus, a self-training algorithm that had been assumed feasible only for prokaryotic genomes has now been developed for ab initio eukaryotic gene identification.}},
  issn     = {0305-1048},
  doi      = {10.1093/nar/gki937},
  url      = {https://doi.org/10.1093/nar/gki937},
  eprint   = {https://academic.oup.com/nar/article-pdf/33/20/6494/3777739/gki937.pdf}
}

@article{10.1093/nar/gki458,
  author   = {Stanke, Mario and Morgenstern, Burkhard},
  title    = {{AUGUSTUS: a web server for gene prediction in eukaryotes that allows user-defined constraints}},
  journal  = {Nucleic Acids Research},
  volume   = {33},
  number   = {suppl_2},
  pages    = {W465-W467},
  year     = {2005},
  month    = {07},
  abstract = {{ We present a WWW server for AUGUSTUS, a software for gene prediction in eukaryotic genomic sequences that is based on a generalized hidden Markov model, a probabilistic model of a sequence and its gene structure. The web server allows the user to impose constraints on the predicted gene structure. A constraint can specify the position of a splice site, a translation initiation site or a stop codon. Furthermore, it is possible to specify the position of known exons and intervals that are known to be exonic or intronic sequence. The number of constraints is arbitrary and constraints can be combined in order to pin down larger parts of the predicted gene structure. The result then is the most likely gene structure that complies with all given user constraints, if such a gene structure exists. The specification of constraints is useful when part of the gene structure is known, e.g. by expressed sequence tag or protein sequence alignments, or if the user wants to change the default prediction. The web interface and the downloadable stand-alone program are available free of charge at http://augustus.gobics.de/submission . }},
  issn     = {0305-1048},
  doi      = {10.1093/nar/gki458},
  url      = {https://doi.org/10.1093/nar/gki458},
  eprint   = {https://academic.oup.com/nar/article-pdf/33/suppl\_2/W465/7623404/gki458.pdf}
}

@article{doi:10.1002/0471250953.bi0403s18,
  author   = {Blanco, Enrique and Parra, Genís and Guigó, Roderic},
  title    = {Using geneid to Identify Genes},
  journal  = {Current Protocols in Bioinformatics},
  volume   = {18},
  number   = {1},
  pages    = {4.3.1-4.3.28},
  keywords = {Gene identification, genes, exons, splicing, genome annotation, bioinformatics},
  doi      = {10.1002/0471250953.bi0403s18},
  url      = {https://currentprotocols.onlinelibrary.wiley.com/doi/abs/10.1002/0471250953.bi0403s18},
  eprint   = {https://currentprotocols.onlinelibrary.wiley.com/doi/pdf/10.1002/0471250953.bi0403s18},
  abstract = {Abstract This unit describes the usage of geneid, an efficient gene-finding program that allows for the analysis of large genomic sequences, including whole mammalian chromosomes. These sequences can be partially annotated, and geneid can be used to refine this initial annotation. Training geneid is relatively easy, and parameter configurations exist for a number of eukaryotic species. Geneid produces output in a variety of standard formats. The results, thus, can be processed by a variety of software tools, including visualization programs. Geneid software is in the public domain, and it is undergoing constant development. It is easy to install and use. Exhaustive benchmark evaluations show that geneid compares favorably with other existing gene finding tools.},
  year     = {2007}
}
@article{pmid32489650,
  author  = {Pertea, G.  and Pertea, M. },
  title   = {{{G}{F}{F} {U}tilities: {G}ff{R}ead and {G}ff{C}ompare}},
  journal = {F1000Res},
  year    = {2020},
  volume  = {9},
  pages   = {304}
}